library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
all_stats_df <- read_csv('../data/Player_Stats.csv')
## Rows: 563 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Team_Alias, Player_Name, Position
## dbl (11): Minutes, PPG, APG, DefRBD, OffRBD, SPG, BPG, Paint_PPG, att_3PT_pg...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Player_data <- all_stats_df %>% filter(Minutes >= 528)
head(Player_data)
## # A tibble: 6 × 14
## Team_Alias Player_Name Minutes PPG APG DefRBD OffRBD SPG BPG Paint_PPG
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MIN Kyle Ander… 1782 6.4 4.2 2.7 0.76 0.9 0.59 4.28
## 2 MIN Rudy Gobert 2593 14 1.3 9.17 3.75 0.68 2.13 10.6
## 3 MIN Mike Conley 2193 11.4 5.9 2.38 0.49 1.16 0.22 2.63
## 4 MIN Naz Reid 1964 13.5 1.3 4.32 0.9 0.78 0.9 5.68
## 5 MIN Jaden McDa… 2105 10.5 1.4 2.33 0.78 0.88 0.58 5.36
## 6 MIN Karl-Antho… 2026 21.8 3 6.79 1.52 0.69 0.66 9.9
## # ℹ 4 more variables: att_3PT_pg <dbl>, made_3PT_pg <dbl>, ATO <dbl>,
## # Position <chr>
# Inspect the structure of Player_data
str(Player_data)
## spc_tbl_ [300 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Team_Alias : chr [1:300] "MIN" "MIN" "MIN" "MIN" ...
## $ Player_Name: chr [1:300] "Kyle Anderson" "Rudy Gobert" "Mike Conley" "Naz Reid" ...
## $ Minutes : num [1:300] 1782 2593 2193 1964 2105 ...
## $ PPG : num [1:300] 6.4 14 11.4 13.5 10.5 21.8 3.5 25.9 8 3.3 ...
## $ APG : num [1:300] 4.2 1.3 5.9 1.3 1.4 3 2 5.1 2.5 1 ...
## $ DefRBD : num [1:300] 2.7 9.17 2.38 4.32 2.33 6.79 0.96 4.78 1.61 2.06 ...
## $ OffRBD : num [1:300] 0.76 3.75 0.49 0.9 0.78 1.52 0.32 0.66 0.43 1.49 ...
## $ SPG : num [1:300] 0.9 0.68 1.16 0.78 0.88 0.69 0.63 1.28 0.78 0.24 ...
## $ BPG : num [1:300] 0.59 2.13 0.22 0.9 0.58 0.66 0.13 0.53 0.51 0.29 ...
## $ Paint_PPG : num [1:300] 4.28 10.61 2.63 5.68 5.36 ...
## $ att_3PT_pg : num [1:300] 0.61 0.04 5.33 5.04 3.54 5.27 1.59 6.73 4.09 0.02 ...
## $ made_3PT_pg: num [1:300] 0.14 0 2.36 2.09 1.19 2.19 0.75 2.41 1.6 0 ...
## $ ATO : num [1:300] 3.64 0.86 4.4 0.95 1.16 1.07 5.79 1.68 2.68 1.76 ...
## $ Position : chr [1:300] "F" "C" "G" "C" ...
## - attr(*, "spec")=
## .. cols(
## .. Team_Alias = col_character(),
## .. Player_Name = col_character(),
## .. Minutes = col_double(),
## .. PPG = col_double(),
## .. APG = col_double(),
## .. DefRBD = col_double(),
## .. OffRBD = col_double(),
## .. SPG = col_double(),
## .. BPG = col_double(),
## .. Paint_PPG = col_double(),
## .. att_3PT_pg = col_double(),
## .. made_3PT_pg = col_double(),
## .. ATO = col_double(),
## .. Position = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# Define columns that should be converted to specific types
factor_columns <- c("Team_Alias", "Player_Name", "Position")
numeric_columns <- c("Minutes", "PPG", "APG", "DefRBD", "OffRBD", "SPG", "BPG",
"Paint_PPG", "att_3PT_pg", "made_3PT_pg", "ATO")
# Loop to convert columns to factors
for (col in factor_columns) {
Player_data[[col]] <- as.factor(Player_data[[col]])
}
# Loop to convert columns to numeric
for (col in numeric_columns) {
Player_data[[col]] <- as.numeric(Player_data[[col]])
}
# Check the updated structure of Player_data
str(Player_data)
## spc_tbl_ [300 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Team_Alias : Factor w/ 25 levels "ATL","BOS","CHA",..: 15 15 15 15 15 15 15 15 15 5 ...
## $ Player_Name: Factor w/ 292 levels "Aaron Gordon",..: 187 247 214 220 120 168 154 15 223 277 ...
## $ Minutes : num [1:300] 1782 2593 2193 1964 2105 ...
## $ PPG : num [1:300] 6.4 14 11.4 13.5 10.5 21.8 3.5 25.9 8 3.3 ...
## $ APG : num [1:300] 4.2 1.3 5.9 1.3 1.4 3 2 5.1 2.5 1 ...
## $ DefRBD : num [1:300] 2.7 9.17 2.38 4.32 2.33 6.79 0.96 4.78 1.61 2.06 ...
## $ OffRBD : num [1:300] 0.76 3.75 0.49 0.9 0.78 1.52 0.32 0.66 0.43 1.49 ...
## $ SPG : num [1:300] 0.9 0.68 1.16 0.78 0.88 0.69 0.63 1.28 0.78 0.24 ...
## $ BPG : num [1:300] 0.59 2.13 0.22 0.9 0.58 0.66 0.13 0.53 0.51 0.29 ...
## $ Paint_PPG : num [1:300] 4.28 10.61 2.63 5.68 5.36 ...
## $ att_3PT_pg : num [1:300] 0.61 0.04 5.33 5.04 3.54 5.27 1.59 6.73 4.09 0.02 ...
## $ made_3PT_pg: num [1:300] 0.14 0 2.36 2.09 1.19 2.19 0.75 2.41 1.6 0 ...
## $ ATO : num [1:300] 3.64 0.86 4.4 0.95 1.16 1.07 5.79 1.68 2.68 1.76 ...
## $ Position : Factor w/ 7 levels "C","C-F","F",..: 3 1 6 1 3 2 6 6 6 3 ...
## - attr(*, "spec")=
## .. cols(
## .. Team_Alias = col_character(),
## .. Player_Name = col_character(),
## .. Minutes = col_double(),
## .. PPG = col_double(),
## .. APG = col_double(),
## .. DefRBD = col_double(),
## .. OffRBD = col_double(),
## .. SPG = col_double(),
## .. BPG = col_double(),
## .. Paint_PPG = col_double(),
## .. att_3PT_pg = col_double(),
## .. made_3PT_pg = col_double(),
## .. ATO = col_double(),
## .. Position = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# Exclude Player_Name and Team_Alias for clustering
Player_data_without_names <- Player_data[, !(names(Player_data) %in% c("Player_Name", "Team_Alias"))]
# Create a Gower's dissimilarity matrix
library(cluster)
dissimilarity_matrix <- daisy(Player_data_without_names, metric = "gower")
library(cluster)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
mds_result <- cmdscale(dissimilarity_matrix, k = 2)
mds_data <- as.data.frame(mds_result)
colnames(mds_data) <- c("MDS1", "MDS2")
mds_data$Player_Name <- Player_data$Player_Name
clusters <- cutree(hclust(dissimilarity_matrix), k = 5)
mds_data$cluster <- as.factor(clusters)
# Manually assign "Superstar Big Men" cluster
superstar_big_men <- c(
"Nikola Jokić",
"Victor Wembanyama",
"Domantas Sabonis",
"Anthony Davis",
"Giannis Antetokounmpo"
)
mds_data$cluster_name <- ifelse(
mds_data$Player_Name %in% superstar_big_men,
"Superstar Big Men",
as.character(clusters)
)
# Map cluster names
cluster_names <- c(
"1" = "Role-Playing Forwards",
"2" = "Defensive Centers",
"3" = "Playmaking Guards",
"4" = "Limited-Minute Guards",
"5" = "Elite All-Around Forwards"
)
# Update cluster names for remaining players
mds_data$cluster_name <- ifelse(
mds_data$cluster_name == "Superstar Big Men",
"Superstar Big Men",
cluster_names[mds_data$cluster_name]
)
# Create an interactive plot with Plotly
interactive_plot <- plot_ly(
data = mds_data,
x = ~MDS1,
y = ~MDS2,
color = ~cluster_name,
colors = c(
"blue", "purple", "black", "orange", "red", "green"
), # Add color for "Superstar Big Men"
text = ~paste(
"Player Name:", Player_Name,
"<br>Cluster:", cluster_name
),
type = "scatter",
mode = "markers",
marker = list(size = 8, opacity = 0.8)
) %>%
layout(
title = "Interactive MDS Plot of Players by Cluster",
xaxis = list(title = "MDS Dimension 1"),
yaxis = list(title = "MDS Dimension 2"),
legend = list(title = list(text = "Cluster Names"))
)
# Show the interactive plot
interactive_plot